/*----------------------------------------\
| Bootstrap TTEST; |
|-------------------------------------------|
|--------------------------------------------------------------------|
|---------------------------|
| Arguments: |
| indata: required; the input dataset, you want to resample with; |
| var: required; the variable you want to bootstrap; |
| group: not required; the group variable if any; |
| label: not required; the value of the group variable if any; |
| nrun: the num of iterations you want to resample; |
| seed: required, but default is 1863822967; the input seed; |
| outdata: the output dataset; |
| outseed: a macro variable name to keep the last seed just in |
| case you want to run several resampling at once; |
|--------------------------------|
|--------------------------------------------------------------------|
|---------------------------------------|
| Example: |
| %rspmeans(indata=ridamy.ittbur1, out=ittbur, group=rassignt, |
| var=ittbur); |
| %resample(indata=ittbur ,var=ittbur, group=rassignt, label="ON", |
| nrun=5000, seed=1863822967, outdata=brdnon, |
| outseed=myseed); |
| %resample(indata=ittbur ,var=ittbur, group=rassignt, label="OFF",|
| nrun=5000, seed=&myseed, outdata=brdnoff, |
| outseed=myseed); |
| %printout(indata=ittbur, var=ittbur, group=rassignt, |
| boot1=brdnon, boot2=brdnoff, nrun=5000); |
| Usage: %resample(indata= ,var=, group=, label=, nrun=, |
| seed=1863822967, outdata=, outseed=); |
\----------------------------------------*/
%macro resample(indata= ,var=, group=, label=, nrun=, seed=1863822967, outdata=, outseed=);
%local _dsid_ _ngp_ _seed_; %let _seed_=;
%let _rsstarttime_=%sysfunc(datetime());
%if (%quote(%upcase(seed)) eq %quote(%upcase(&outseed))) %then %do;
%put ==> Alert! The output seed cannot be "seed" or "SEED".;
%goto finish;
%end;
%global &outseed;
%if %sysfunc(exist(_group_)) %then %do;
proc sql noprint;
drop table _group_;
%end;
%if %sysfunc(exist(&outdata)) %then %do;
proc sql noprint;
drop table &outdata;
%end;
%if (%quote(&indata) ne) and (%quote(&var) ne) %then %do;
proc sql noprint;
create table _group_ as
select &var
%if (%quote(&group) ne) and (%quote(&label) ne) %then %do;
, &group
%end;
from &indata
%if (%quote(&group) ne) and (%quote(&label) ne) %then %do;
WHERE &group=&label
%end;;
quit;
%end;
%else %do;
%put ==> Alert! No dataset or no variables is provided for bootstrap!;
%end;
%if (%chk_type(&nrun) = 1) %then %do;
%if (&nrun lt 1) %then %do;
%put ==> Alert! The number of times you want to resample cannot be less than 1.;
%end;
%if (%chk_type(&seed) = 1) and (%quote(&_seed_) eq) %then %let _seed_=&seed;
%else %if (%chk_type(&seed) ne 1) and (%quote(&_seed_) eq) %then %let _seed_=679897321;
%do _ij_=1 %to &nrun;
%put NOTE: Wait! Macro RESAMPLE is running...;
options nonotes;
data _resample_;
_seed_=&_seed_;
do j=1 to n;
CALL RANUNI(_seed_,r);
resampleid=&_ij_;
i=ceil(r*n);
set _group_ point=i nobs=n ;
output;
end;
CALL SYMPUT("_seed_",_seed_);
call symput("_rsnobs_", n);
stop;
keep &var resampleid %if (%quote(&group) ne) and (%quote(&label) ne) %then %do; &group %end;;
RUN;
%if (&_ij_=&nrun) and (%quote(&outseed) ne) %then %do;
%let &outseed=&_seed_;
%end;
proc means data=_resample_ noprint;
by resampleid %if (%quote(&group) ne) and (%quote(&label) ne) %then %do; &group %end;;
var &var;
output out=_resamplemean_(drop=_TYPE_ _FREQ_) N=N mean=mean std=std min=min max=max median=median p25=p25 p75=p75 ;
run;
%if (%quote(&outdata) ne) and %sysfunc(exist(&outdata)) %then %do;
proc append base=&outdata data=_resamplemean_;
%end;
%else %if (%quote(&outdata) ne) %then %do;
data &outdata; set _resamplemean_;
%end; run;
%if %sysfunc(exist(_resample_)) %then %do;
proc sql noprint; drop table _resample_; drop table _resamplemean_;
%end;
%end;
%if %sysfunc(exist(_group_)) %then %do;
proc sql noprint; drop table _group_;
%end;
quit;
options notes;
%end;
%else %put ==> Alert! You must choose the number of times you want to do resampling;
%let _rsduration_=%sysevalf(%sysfunc(datetime())-&_rsstarttime_);
%let _rsday_ = %trim(%left(%eval(%sysfunc(day(%sysfunc(datepart(&_rsduration_)))) - 1)));
%let _rshour_ = %trim(%left(%eval(%sysfunc(hour(&_rsduration_))-0)));
%let _rsminute_ = %trim(%left(%eval(%sysfunc(minute(&_rsduration_))-0)));
%let _rssecond_ = %trim(%left(%sysevalf(%sysfunc(round(%sysfunc(second (&_rsduration_)), .01))-0)));
%let _rsdurnote_=;
%if (&_rsday_ gt 0) %then %let _rsdurnote_= %trim(%left(&_rsdurnote_)) %trim(%left(&_rsday_)) days;
%if (&_rshour_ gt 0) %then %let _rsdurnote_= %trim(%left(&_rsdurnote_)) %trim(%left(&_rshour_)) hours;
%if (&_rsminute_ gt 0) %then %let _rsdurnote_= %trim(%left(&_rsdurnote_)) %trim(%left(&_rsminute_)) minutes;
%if (&_rssecond_ gt 0) %then %let _rsdurnote_= %trim(%left(&_rsdurnote_)) %trim(%left(&_rssecond_)) seconds;
%put NOTE: The procedure %nrbquote(%)%upcase(resample) has resampled &nrun samples, each with %trim(%left(&_rsnobs_)) observations, in &_rsdurnote_.. ;
%finish:
%mend resample;